Temporal overview
p_year %>%
inner_join(poems,by=c("p_id")) %>%
count(collection,year) %>%
mutate(measure="yearly count") %>%
union_all(
p_year %>% # 10 year rolling mean
distinct(year) %>%
left_join(p_year %>% distinct(year),sql_on="RHS.year BETWEEN LHS.year-5 AND LHS.year+4") %>%
inner_join(p_year,by=c("year.y"="year")) %>%
inner_join(poems,by=c("p_id")) %>%
group_by(collection=collection,year=year.x) %>%
summarize(n=n()/n_distinct(year.y),.groups="drop") %>%
mutate(measure="10 year rolling mean")
) %>%
filter(collection!="literary",!year %in% c(0,9999)) %>%
mutate(year=if_else(year>=1800,year,1780)) %>%
group_by(collection,measure,year) %>%
summarise(n=sum(n),.groups="drop") %>%
collect() %>%
complete(year,collection,measure,fill=list(n=0)) %>%
mutate(collection=fct_relevel(str_to_upper(collection),"ERAB","SKVR","JR")) %>%
group_by(collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>4600,xoutlier=year<1800) %>%
ggplot(aes(x=year,y=n,color=collection)) +
geom_point(data=~.x %>% filter(measure=="yearly count",youtlier==FALSE),size=0.5) +
geom_point(data=~.x %>% filter(youtlier==TRUE),aes(x=year),y=5000) +
geom_text_repel(data=~.x %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=5000, show.legend=FALSE) +
geom_point(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n)) +
geom_text_repel(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=~.x %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,4600),xlim=c(1800,1970),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=1000),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")
## Warning: Missing values are always removed in SQL aggregation functions.
## Use `na.rm = TRUE` to silence this warning
## This warning is displayed once every 8 hours.

# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
top_top_types <- p_typ %>%
filter(!is_minor) %>%
inner_join(poems) %>%
inner_join(types_to_top_level_types) %>%
count(collection, ancestor_t_id) %>%
group_by(collection) %>%
slice_max(n,n=9) %>%
ungroup() %>%
mutate(top_type=TRUE) %>%
select(ancestor_t_id,top_type) %>%
compute_a(temporary=TRUE, overwrite=TRUE)
d <- p_year %>%
inner_join(poems,by=c("p_id")) %>%
left_join(p_typ %>%
filter(!is_minor) %>%
inner_join(types_to_top_level_types %>%
inner_join(types %>%
filter(!str_detect(type_orig_id,"^erab_orig")) %>%
select(ancestor_t_id=t_id,ancestor_type_name=name))) %>%
left_join(top_top_types) %>%
mutate(
ancestor_type_name=if_else(!is.na(top_type),ancestor_type_name,"Muut"),
ancestor_t_id=if_else(!is.na(top_type),ancestor_t_id,-1),
)
) %>%
replace_na(list(ancestor_type_name="Tuntematon", ancestor_t_id=-2)) %>%
distinct(ancestor_t_id,ancestor_type_name, collection, year, p_id) %>%
count(ancestor_t_id,ancestor_type_name, collection, year) %>%
mutate(measure="yearly count") %>%
union_all(
p_year %>% # 10 year rolling mean
distinct(year) %>%
left_join(p_year %>% distinct(year),sql_on="RHS.year BETWEEN LHS.year-5 AND LHS.year+4") %>%
inner_join(p_year,by=c("year.y"="year")) %>%
inner_join(poems,by=c("p_id")) %>%
left_join(p_typ %>%
inner_join(types_to_top_level_types %>%
inner_join(types %>%
filter(!str_detect(type_orig_id,"^erab_orig")) %>%
select(ancestor_t_id=t_id,ancestor_type_name=name))) %>%
left_join(top_top_types) %>%
mutate(
ancestor_type_name=if_else(!is.na(top_type),ancestor_type_name,"Muut"),
ancestor_t_id=if_else(!is.na(top_type),ancestor_t_id,-1),
)
) %>%
replace_na(list(ancestor_type_name="Tuntematon", ancestor_t_id=-2)) %>%
distinct(ancestor_t_id,ancestor_type_name, collection, year.x, year.y, p_id) %>%
group_by(ancestor_t_id,ancestor_type_name, collection, year=year.x) %>%
summarize(n=n()/n_distinct(year.y),.groups="drop") %>%
mutate(measure="10 year rolling mean")
) %>%
filter(collection!="literary",!year %in% c(0L,9999L)) %>%
mutate(year=if_else(year>=1800L,year,1780L)) %>%
group_by(ancestor_type_name, collection, measure, year) %>%
summarise(n=sum(n),.groups="drop") %>%
collect()
d %>%
mutate(collection=fct_relevel(str_to_upper(collection),"SKVR","ERAB","JR")) %>%
filter(collection=="SKVR") %>%
complete(ancestor_type_name, year,collection,measure,fill=list(n=0)) %>%
group_by(ancestor_type_name, collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>1300,xoutlier=year<1800) %>%
ggplot(aes(x=year,y=n,color=ancestor_type_name)) +
# facet_wrap(~collection) +
geom_point(data=~.x %>% filter(measure=="yearly count",youtlier==FALSE,xoutlier==FALSE),size=0.5) +
geom_point(data=~.x %>% filter(youtlier==TRUE),aes(x=year),y=1400) +
geom_text_repel(data=~.x %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=1400, show.legend=FALSE) +
geom_point(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n)) +
geom_text_repel(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=~.x %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,1300),xlim=c(1800,1940),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=500),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")

# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
youtlier_limit = 2000
d %>%
mutate(collection=fct_relevel(str_to_upper(collection),"SKVR","ERAB")) %>%
filter(collection=="ERAB") %>%
complete(ancestor_type_name, year,collection,measure,fill=list(n=0)) %>%
group_by(ancestor_type_name, collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>youtlier_limit,xoutlier=year<1800) %>%
mutate(ancestor_type_name=case_match(ancestor_type_name,
"Laulud noorrahva elust" ~ "Laulut nuorison elämästä (Laulud noorrahva elust)",
"Muut" ~ "Muut (sisältää 17 luokkaa)",
"Laulud meelelahutamiseks" ~ "Viihdytyslaulut (Laulud meelelahutamiseks)",
"Lüroeepilised laulud" ~ "Lyroeeppiset laulut (Lüroeepilised laulud)",
"Laulud laulust" ~ "Laulut laulusta (Laulud laulust)",
"Töölaulud" ~ "Työlaulut (Töölaulud)",
"Looduslaulud" ~ "Laulut luonnosta (Looduslaulud)",
"Laulud ühiskondlikest vahekordadest" ~ "Laulut yhteiskunnallisista suhteista\n(Laulud ühiskondlikest vahekordadest)",
"Murelaulud" ~ "Huolilaulut (Murelaulud)",
"Laulud abielust" ~ "Laulut avioelämästä (Laulud abielust)",
"Kalendrilaulud" ~ "Kalendaarilaulut (Kalendrilaulud)",
.default=ancestor_type_name
)) %>%
mutate(ancestor_type_name=fct_reorder(ancestor_type_name,n,.fun=sum,.desc=TRUE)) %>%
mutate(ancestor_type_name=fct_relevel(ancestor_type_name, "Muut (sisältää 17 luokkaa)", after=Inf)) %>%
ggplot(aes(x=year,y=n,color=ancestor_type_name)) +
# facet_wrap(~collection) +
geom_point(data=. %>% filter(measure=="yearly count",youtlier==FALSE,xoutlier==FALSE),size=0.5) +
geom_point(data=. %>% filter(youtlier==TRUE),aes(x=year),y=youtlier_limit+100) +
geom_text_repel(data=. %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=youtlier_limit+100, show.legend=FALSE) +
geom_point(data=. %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n)) +
geom_text_repel(data=. %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=1785,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=. %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,youtlier_limit),xlim=c(1800,1950),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=200),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
# guides(color=guide_legend(nrow=2)) +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")

# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
d %>%
mutate(collection=fct_relevel(str_to_upper(collection),"SKVR","ERAB","JR")) %>%
filter(collection=="JR") %>%
complete(ancestor_type_name, year,collection,measure,fill=list(n=0)) %>%
group_by(ancestor_type_name, collection,measure) %>%
arrange(year) %>%
filter(n!=0 | lag(n)!=0 | lead(n)!=0) %>%
ungroup() %>%
mutate(youtlier=n>6500,xoutlier=year<1800) %>%
ggplot(aes(x=year,y=n,color=ancestor_type_name)) +
# facet_wrap(~collection) +
geom_point(data=~.x %>% filter(measure=="yearly count",youtlier==FALSE),size=0.5) +
geom_point(data=~.x %>% filter(youtlier==TRUE),aes(x=year),y=7200) +
geom_text_repel(data=~.x %>% filter(youtlier==TRUE),aes(x=year,label=scales::number(n)),y=7200, show.legend=FALSE) +
geom_point(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n)) +
geom_text_repel(data=~.x %>% filter(xoutlier==TRUE,measure=="yearly count"),aes(x=year,y=n,label=scales::number(n)), show.legend=FALSE) +
geom_line(data=~.x %>% filter(xoutlier==FALSE,measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(
legend.justification=c(0,1),
legend.position=c(0.02, 0.98),
legend.background = element_blank(),
legend.key=element_blank()
) +
labs(color=NULL) +
coord_cartesian(ylim=c(0,6500),xlim=c(1800,1960),clip="off") +
scale_y_continuous(breaks=seq(0,20000,by=1000),labels=scales::number) +
# ylab("Poems") +
ylab("Runojen määrä") +
scale_x_continuous(breaks=seq(1000,2000,by=10)) +
# xlab("Year") +
xlab("Vuosi") +
ggtitle("")

# ggtitle("Runojen määrä vuosittain ja kokoelmittain")
# ggtitle("Number of poems by year and collection")
p_year %>%
filter(year %in% c(0,9999)) %>%
left_join(poems) %>%
count(collection,year) %>%
ungroup() %>%
gt() %>%
tab_header(title="Abnormal years") %>%
fmt_integer(n)
| Abnormal years |
| collection |
year |
n |
| skvr |
9999 |
469 |
| erab |
0 |
6,670 |